@@ -1,5 +1,6 @@ |
||
1 | 1 |
# Changes |
2 | 2 |
|
3 |
+* Jun 19, 2015 - Add `url_from_event` to WebsiteAgent. |
|
3 | 4 |
* Jun 17, 2015 - RssAgent emits events for new feed items in chronological order. |
4 | 5 |
* Jun 15, 2015 - Liquid filter `uri_expand` added. |
5 | 6 |
* Jun 12, 2015 - RSSAgent can now accept an array of URLs. |
@@ -19,7 +19,7 @@ module Agents |
||
19 | 19 |
|
20 | 20 |
`url` can be a single url, or an array of urls (for example, for multiple pages with the exact same structure but different content to scrape) |
21 | 21 |
|
22 |
- The WebsiteAgent can also scrape based on incoming events. It will scrape the url contained in the `url` key of the incoming event payload. If you specify `merge` as the `mode`, it will retain the old payload and update it with the new values. |
|
22 |
+ The WebsiteAgent can also scrape based on incoming events. It will scrape the url contained in the `url` key of the incoming event payload, or if you set `url_from_event` it is used as a Liquid template to generate the url to access. If you specify `merge` as the `mode`, it will retain the old payload and update it with the new values. |
|
23 | 23 |
|
24 | 24 |
# Supported Document Types |
25 | 25 |
|
@@ -135,7 +135,8 @@ module Agents |
||
135 | 135 |
|
136 | 136 |
def validate_options |
137 | 137 |
# Check for required fields |
138 |
- errors.add(:base, "url and expected_update_period_in_days are required") unless options['expected_update_period_in_days'].present? && options['url'].present? |
|
138 |
+ errors.add(:base, "either url or url_from_event is required") unless options['url'].present? || options['url_from_event'].present? |
|
139 |
+ errors.add(:base, "expected_update_period_in_days is required") unless options['expected_update_period_in_days'].present? |
|
139 | 140 |
if !options['extract'].present? && extraction_type != "json" |
140 | 141 |
errors.add(:base, "extract is required for all types except json") |
141 | 142 |
end |
@@ -257,7 +258,12 @@ module Agents |
||
257 | 258 |
def receive(incoming_events) |
258 | 259 |
incoming_events.each do |event| |
259 | 260 |
interpolate_with(event) do |
260 |
- url_to_scrape = event.payload['url'] |
|
261 |
+ url_to_scrape = |
|
262 |
+ if url_template = options['url_from_event'].presence |
|
263 |
+ interpolate_string(url_template) |
|
264 |
+ else |
|
265 |
+ event.payload['url'] |
|
266 |
+ end |
|
261 | 267 |
check_url(url_to_scrape, |
262 | 268 |
interpolated['mode'].to_s == "merge" ? event.payload : {}) |
263 | 269 |
end |
@@ -633,6 +633,17 @@ fire: hot |
||
633 | 633 |
}.to change { Event.count }.by(1) |
634 | 634 |
end |
635 | 635 |
|
636 |
+ it "should use url_from_event as url to scrape if it exists when receiving an event" do |
|
637 |
+ stub = stub_request(:any, 'http://example.org/?url=http%3A%2F%2Fxkcd.com') |
|
638 |
+ |
|
639 |
+ @checker.options = @valid_options.merge( |
|
640 |
+ 'url_from_event' => 'http://example.org/?url={{url | uri_escape}}' |
|
641 |
+ ) |
|
642 |
+ @checker.receive([@event]) |
|
643 |
+ |
|
644 |
+ expect(stub).to have_been_requested |
|
645 |
+ end |
|
646 |
+ |
|
636 | 647 |
it "should interpolate values from incoming event payload" do |
637 | 648 |
expect { |
638 | 649 |
@valid_options['extract'] = { |